#import sys
#print(sys.executable)
#!python --version
#!pip install GDAL-3.4.2-cp38-cp38-win_amd64.whl
#!pip install Fiona-1.8.21-cp38-cp38-win_amd64.whl
#!pip install pyproj-3.3.1-cp38-cp38-win_amd64.whl
#!pip install Shapely-1.8.1.post1-cp38-cp38-win_amd64.whl
#!pip install geopandas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from urllib.request import urlopen
import json
import geopandas as gpd
from IPython.core.display import display, HTML
from plotly.offline import download_plotlyjs, init_notebook_mode, plot as px_plot
config={'showLink': False, 'displayModeBar': False}
init_notebook_mode(connected=True)
Import data and preprocess¶
data = pd.read_csv("NYCgov_Poverty_Measure_Data__2015_.csv")
features = ['SERIALNO', 'SPORDER', 'AGEP', 'CIT', 'REL', 'SCH',
'SCHG', 'SCHL', 'SEX', 'ESR', 'LANX', 'ENG', 'MSP',
'WKHP', 'DIS', 'JWTR', 'NP', 'TEN', 'HHT', 'AgeCateg', 'Boro',
'CitizenStatus', 'EducAttain', 'Ethnicity', 'FamType_PU', 'FTPTWork',
'INTP_adj', 'MRGP_adj', 'NYCgov_Income', 'NYCgov_Pov_Stat', 'NYCgov_REL',
'NYCgov_Threshold', 'Off_Pov_Stat', 'Off_Threshold', 'OI_adj', 'PA_adj',
'Povunit_ID', 'Povunit_Rel', 'PreTaxIncome_PU', 'RETP_adj', 'RNTP_adj',
'SEMP_adj', 'SSIP_adj', 'SSP_adj', 'TotalWorkHrs_PU', 'WAGP_adj']
#Recode = code in dictionary
# CIT: Citenzenship
# REL: is relationship ie. Daughter, Son, etc. is ACS code ()
# SCH, SCHG: (SCHG is ACS code) for educaiton
# SCHL: Education attainment ACS code
# ESR: Employement status (code in dictionary file)
# LANX: language other than language spoken
# ENG: ability to speak english
# MSP: Married or not (code in dictionary file)
# MAR: Marital status
# WKHP: huors work per week
# DIS: disability (Recode)
# JWTR: transportation to work (ACS)
# NP: number of people in household
# TEN: Housing tenure
# FamType_PT: PovertyUnit familytype (umiddelbart fjerne)
# FTPTWork: work experience (recode)
# INTP_adj: Income adjusted
# MRGP_adj: Morgage amount adjusted
# SEMP_adj: self employed
# SSIP_adh: supplementary income
# SSP_adj: social socurity income (people who are disabled)
# WAGP_adj: Wages
Visulization¶
Number of healty tree in each district
Probablity of healthy tree in each district
histogram of diameter
histogram of depth
plot of location for trees (heatmap)
X = data[features]
X_grouped = X.groupby(['Boro']).median()
gdf = gpd.read_file('https://raw.githubusercontent.com/dwillis/nyc-maps/master/boroughs.geojson')
gdf.to_crs(epsg=4326, inplace=True)
gdf.set_index('BoroName', inplace=True)
gdf['BoroCode'] = [5,4,2,3,1]
gdf.sort_index(inplace=True)
X_grouped['BoroName'] = ['Bronx','Brooklyn','Manhattan','Queens','Staten Island']
X_grouped.set_index('BoroName',inplace=True)
att = 'PreTaxIncome_PU'
#### Education
#### Salary
#### ethnicity
#X = data[features]
X_grouped = data.groupby(['Boro']).median()
gdf = gpd.read_file('https://raw.githubusercontent.com/dwillis/nyc-maps/master/boroughs.geojson')
gdf.to_crs(epsg=4326, inplace=True)
gdf.set_index('BoroName', inplace=True)
gdf['BoroCode'] = [5,4,2,3,1]
gdf.sort_index(inplace=True)
X_grouped['BoroName'] = ['Bronx','Brooklyn','Manhattan','Queens','Staten Island']
X_grouped.set_index('BoroName',inplace=True)
att = 'Total_income'
fig = px.choropleth_mapbox(X_grouped[att].reset_index(), geojson=gdf['geometry'], locations=gdf.index, color='Total_income',
color_continuous_scale="Viridis",
range_color=(X_grouped[att].min(),X_grouped[att].max()),
mapbox_style="carto-positron",
zoom=8.5, center = {"lat": 40.730610, "lon": -73.935242},
opacity=0.5,
labels={'Median total income in borough':att}
)
px_plot(fig, filename = 'figure_1.html')
display(HTML('figure_1.html'))
#fig.update_layout(margin={"r":300,"t":100,"l":200,"b":0})
#fig.show("notebook")
#fig.show()
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Total_income'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_2640/2293723536.py in <module>
13 X_grouped.set_index('BoroName',inplace=True)
14 att = 'Total_income'
---> 15 fig = px.choropleth_mapbox(X_grouped[att].reset_index(), geojson=gdf['geometry'], locations=gdf.index, color='Total_income',
16 color_continuous_scale="Viridis",
17 range_color=(X_grouped[att].min(),X_grouped[att].max()),
~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
3456 if self.columns.nlevels > 1:
3457 return self._getitem_multilevel(key)
-> 3458 indexer = self.columns.get_loc(key)
3459 if is_integer(indexer):
3460 indexer = [indexer]
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 'Total_income'
#import plotly.graph_objects as go
#fig = go.Choroplethmapbox(X_grouped[att].reset_index(), geojson=gdf['geometry'], locations=gdf.index, color='PreTaxIncome_PU',
# color_continuous_scale="Viridis",
# range_color=(X_grouped[att].min(),X_grouped[att].max()),
# mapbox_style="carto-positron",
# zoom=8.5, center = {"lat": 40.730610, "lon": -73.935242},
# opacity=0.5#,
# #labels={att:att}
# )
#px_plot(fig, filename = 'figure_1.html')
#display(HTML('figure_1.html'))
#fig.update_layout(margin={"r":300,"t":100,"l":200,"b":0})
#fig.show("notebook")
#fig.show()
go.Choropleth(
locations=result.state,
z = result.total,
locationmode = 'USA-states', # set of locations match entries in `locations`
marker_line_color='white',
colorbar_title = "Shooting deaths",
)
import plotly.graph_objects as go
go.Choroplethmapbox(geojson=gdf['geometry'],
locations=gdf.index,
#z=df['2019'],
z = X_grouped[att].reset_index(),
featureidkey='properties.id',
#color='PreTaxIncome_PU',
#colorscale='matter_r',
color_continuous_scale="Viridis",
range_color=(X_grouped[att].min(),X_grouped[att].max()),
apbox_style="carto-positron",
zoom=8.5,
center = {"lat": 40.730610, "lon": -73.935242},
opacity=0.5)#,
#colorbar=dict(thickness=20, x=1.02),
#marker=dict(opacity=0.75, line_width=0.5))
#(X_grouped[att].reset_index(), geojson=gdf['geometry'], locations=gdf.index, color='PreTaxIncome_PU',
# color_continuous_scale="Viridis",
# range_color=(X_grouped[att].min(),X_grouped[att].max()),
# mapbox_style="carto-positron",
# zoom=8.5, center = {"lat": 40.730610, "lon": -73.935242},
# opacity=0.5#,
# #labels={att:att}
# )
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_13724/1173200759.py in <module>
12 zoom=8.5,
13 center = {"lat": 40.730610, "lon": -73.935242},
---> 14 opacity=0.5)#,
15 #colorbar=dict(thickness=20, x=1.02),
16 #marker=dict(opacity=0.75, line_width=0.5))
~\AppData\Roaming\Python\Python37\site-packages\plotly\graph_objs\_choroplethmapbox.py in __init__(self, arg, autocolorscale, below, coloraxis, colorbar, colorscale, customdata, customdatasrc, featureidkey, geojson, hoverinfo, hoverinfosrc, hoverlabel, hovertemplate, hovertemplatesrc, hovertext, hovertextsrc, ids, idssrc, legendgroup, legendgrouptitle, legendrank, locations, locationssrc, marker, meta, metasrc, name, reversescale, selected, selectedpoints, showlegend, showscale, stream, subplot, text, textsrc, uid, uirevision, unselected, visible, z, zauto, zmax, zmid, zmin, zsrc, **kwargs)
2252 # Process unknown kwargs
2253 # ----------------------
-> 2254 self._process_kwargs(**dict(arg, **kwargs))
2255
2256 # Reset skip_invalid
~\AppData\Roaming\Python\Python37\site-packages\plotly\basedatatypes.py in _process_kwargs(self, **kwargs)
4343 self[k] = v
4344 elif not self._skip_invalid:
-> 4345 raise err
4346 # No need to call _raise_on_invalid_property_error here,
4347 # because we have it set up so that the singular case of calling
ValueError: Invalid property specified for object of type plotly.graph_objs.Choroplethmapbox: 'color'
Did you mean "below"?
Valid properties:
autocolorscale
Determines whether the colorscale is a default palette
(`autocolorscale: true`) or the palette determined by
`colorscale`. In case `colorscale` is unspecified or
`autocolorscale` is true, the default palette will be
chosen according to whether numbers in the `color`
array are all positive, all negative or mixed.
below
Determines if the choropleth polygons will be inserted
before the layer with the specified ID. By default,
choroplethmapbox traces are placed above the water
layers. If set to '', the layer will be inserted above
every existing layer.
coloraxis
Sets a reference to a shared color axis. References to
these shared color axes are "coloraxis", "coloraxis2",
"coloraxis3", etc. Settings for these shared color axes
are set in the layout, under `layout.coloraxis`,
`layout.coloraxis2`, etc. Note that multiple color
scales can be linked to the same color axis.
colorbar
:class:`plotly.graph_objects.choroplethmapbox.ColorBar`
instance or dict with compatible properties
colorscale
Sets the colorscale. The colorscale must be an array
containing arrays mapping a normalized value to an rgb,
rgba, hex, hsl, hsv, or named color string. At minimum,
a mapping for the lowest (0) and highest (1) values are
required. For example, `[[0, 'rgb(0,0,255)'], [1,
'rgb(255,0,0)']]`. To control the bounds of the
colorscale in color space, use`zmin` and `zmax`.
Alternatively, `colorscale` may be a palette name
string of the following list: Blackbody,Bluered,Blues,C
ividis,Earth,Electric,Greens,Greys,Hot,Jet,Picnic,Portl
and,Rainbow,RdBu,Reds,Viridis,YlGnBu,YlOrRd.
customdata
Assigns extra data each datum. This may be useful when
listening to hover, click and selection events. Note
that, "scatter" traces also appends customdata items in
the markers DOM elements
customdatasrc
Sets the source reference on Chart Studio Cloud for
`customdata`.
featureidkey
Sets the key in GeoJSON features which is used as id to
match the items included in the `locations` array.
Support nested property, for example "properties.name".
geojson
Sets the GeoJSON data associated with this trace. It
can be set as a valid GeoJSON object or as a URL
string. Note that we only accept GeoJSONs of type
"FeatureCollection" or "Feature" with geometries of
type "Polygon" or "MultiPolygon".
hoverinfo
Determines which trace information appear on hover. If
`none` or `skip` are set, no information is displayed
upon hovering. But, if `none` is set, click and hover
events are still fired.
hoverinfosrc
Sets the source reference on Chart Studio Cloud for
`hoverinfo`.
hoverlabel
:class:`plotly.graph_objects.choroplethmapbox.Hoverlabe
l` instance or dict with compatible properties
hovertemplate
Template string used for rendering the information that
appear on hover box. Note that this will override
`hoverinfo`. Variables are inserted using %{variable},
for example "y: %{y}" as well as %{xother}, {%_xother},
{%_xother_}, {%xother_}. When showing info for several
points, "xother" will be added to those with different
x positions from the first point. An underscore before
or after "(x|y)other" will add a space on that side,
only when this field is shown. Numbers are formatted
using d3-format's syntax %{variable:d3-format}, for
example "Price: %{y:$.2f}".
https://github.com/d3/d3-format/tree/v1.4.5#d3-format
for details on the formatting syntax. Dates are
formatted using d3-time-format's syntax
%{variable|d3-time-format}, for example "Day:
%{2019-01-01|%A}". https://github.com/d3/d3-time-
format/tree/v2.2.3#locale_format for details on the
date formatting syntax. The variables available in
`hovertemplate` are the ones emitted as event data
described at this link
https://plotly.com/javascript/plotlyjs-events/#event-
data. Additionally, every attributes that can be
specified per-point (the ones that are `arrayOk: true`)
are available. variable `properties` Anything contained
in tag `<extra>` is displayed in the secondary box, for
example "<extra>{fullData.name}</extra>". To hide the
secondary box completely, use an empty tag
`<extra></extra>`.
hovertemplatesrc
Sets the source reference on Chart Studio Cloud for
`hovertemplate`.
hovertext
Same as `text`.
hovertextsrc
Sets the source reference on Chart Studio Cloud for
`hovertext`.
ids
Assigns id labels to each datum. These ids for object
constancy of data points during animation. Should be an
array of strings, not numbers or any other type.
idssrc
Sets the source reference on Chart Studio Cloud for
`ids`.
legendgroup
Sets the legend group for this trace. Traces part of
the same legend group hide/show at the same time when
toggling legend items.
legendgrouptitle
:class:`plotly.graph_objects.choroplethmapbox.Legendgro
uptitle` instance or dict with compatible properties
legendrank
Sets the legend rank for this trace. Items and groups
with smaller ranks are presented on top/left side while
with `*reversed* `legend.traceorder` they are on
bottom/right side. The default legendrank is 1000, so
that you can use ranks less than 1000 to place certain
items before all unranked items, and ranks greater than
1000 to go after all unranked items.
locations
Sets which features found in "geojson" to plot using
their feature `id` field.
locationssrc
Sets the source reference on Chart Studio Cloud for
`locations`.
marker
:class:`plotly.graph_objects.choroplethmapbox.Marker`
instance or dict with compatible properties
meta
Assigns extra meta information associated with this
trace that can be used in various text attributes.
Attributes such as trace `name`, graph, axis and
colorbar `title.text`, annotation `text`
`rangeselector`, `updatemenues` and `sliders` `label`
text all support `meta`. To access the trace `meta`
values in an attribute in the same trace, simply use
`%{meta[i]}` where `i` is the index or key of the
`meta` item in question. To access trace `meta` in
layout attributes, use `%{data[n[.meta[i]}` where `i`
is the index or key of the `meta` and `n` is the trace
index.
metasrc
Sets the source reference on Chart Studio Cloud for
`meta`.
name
Sets the trace name. The trace name appear as the
legend item and on hover.
reversescale
Reverses the color mapping if true. If true, `zmin`
will correspond to the last color in the array and
`zmax` will correspond to the first color.
selected
:class:`plotly.graph_objects.choroplethmapbox.Selected`
instance or dict with compatible properties
selectedpoints
Array containing integer indices of selected points.
Has an effect only for traces that support selections.
Note that an empty array means an empty selection where
the `unselected` are turned on for all points, whereas,
any other non-array values means no selection all where
the `selected` and `unselected` styles have no effect.
showlegend
Determines whether or not an item corresponding to this
trace is shown in the legend.
showscale
Determines whether or not a colorbar is displayed for
this trace.
stream
:class:`plotly.graph_objects.choroplethmapbox.Stream`
instance or dict with compatible properties
subplot
Sets a reference between this trace's data coordinates
and a mapbox subplot. If "mapbox" (the default value),
the data refer to `layout.mapbox`. If "mapbox2", the
data refer to `layout.mapbox2`, and so on.
text
Sets the text elements associated with each location.
textsrc
Sets the source reference on Chart Studio Cloud for
`text`.
uid
Assign an id to this trace, Use this to provide object
constancy between traces during animations and
transitions.
uirevision
Controls persistence of some user-driven changes to the
trace: `constraintrange` in `parcoords` traces, as well
as some `editable: true` modifications such as `name`
and `colorbar.title`. Defaults to `layout.uirevision`.
Note that other user-driven trace attribute changes are
controlled by `layout` attributes: `trace.visible` is
controlled by `layout.legend.uirevision`,
`selectedpoints` is controlled by
`layout.selectionrevision`, and `colorbar.(x|y)`
(accessible with `config: {editable: true}`) is
controlled by `layout.editrevision`. Trace changes are
tracked by `uid`, which only falls back on trace index
if no `uid` is provided. So if your app can add/remove
traces before the end of the `data` array, such that
the same trace has a different index, you can still
preserve user-driven changes if you give each trace a
`uid` that stays with it as it moves.
unselected
:class:`plotly.graph_objects.choroplethmapbox.Unselecte
d` instance or dict with compatible properties
visible
Determines whether or not this trace is visible. If
"legendonly", the trace is not drawn, but can appear as
a legend item (provided that the legend itself is
visible).
z
Sets the color values.
zauto
Determines whether or not the color domain is computed
with respect to the input data (here in `z`) or the
bounds set in `zmin` and `zmax` Defaults to `false`
when `zmin` and `zmax` are set by the user.
zmax
Sets the upper bound of the color domain. Value should
have the same units as in `z` and if set, `zmin` must
be set as well.
zmid
Sets the mid-point of the color domain by scaling
`zmin` and/or `zmax` to be equidistant to this point.
Value should have the same units as in `z`. Has no
effect when `zauto` is `false`.
zmin
Sets the lower bound of the color domain. Value should
have the same units as in `z` and if set, `zmax` must
be set as well.
zsrc
Sets the source reference on Chart Studio Cloud for
`z`.
Did you mean "below"?
Bad property path:
color_continuous_scale
^^^^^
#### Education
#### Salary
#### ethnicity
from plotly.subplots import make_subplots
#X = data[features]
X_grouped = data.groupby(['Boro']).count()
#X_grouped =
gdf = gpd.read_file('https://raw.githubusercontent.com/dwillis/nyc-maps/master/boroughs.geojson')
gdf.to_crs(epsg=4326, inplace=True)
gdf.set_index('BoroName', inplace=True)
gdf['BoroCode'] = [5,4,2,3,1]
gdf.sort_index(inplace=True)
X_grouped['BoroName'] = ['Bronx','Brooklyn','Manhattan','Queens','Staten Island']
X_grouped.set_index('BoroName',inplace=True)
att = 'PreTaxIncome_PU'
#fig = make_subplots(rows=1, cols=2)
fig = px.choropleth_mapbox(X_grouped[att].reset_index(), geojson=gdf['geometry'], locations=gdf.index, color='PreTaxIncome_PU',
color_continuous_scale="Viridis",
range_color=(X_grouped[att].min(),X_grouped[att].max()),
mapbox_style="carto-positron",
zoom=8.5, center = {"lat": 40.730610, "lon": -73.935242},
opacity=0.5,
labels={'Median total income in borough':att}
)
fig2 = px.choropleth_mapbox(X_grouped[att].reset_index(), geojson=gdf['geometry'], locations=gdf.index, color='PreTaxIncome_PU',
color_continuous_scale="Viridis",
range_color=(X_grouped[att].min(),X_grouped[att].max()),
mapbox_style="carto-positron",
zoom=8.5, center = {"lat": 40.730610, "lon": -73.935242},
opacity=0.5,
labels={'Median total income in borough':att}
)
def figures_to_html(figs, filename="dashboard.html"):
dashboard = open(filename, 'w')
dashboard.write("<html><head></head><body>" + "\n")
for fig in figs:
inner_html = fig.to_html().split('<body>')[1].split('</body>')[0]
dashboard.write(inner_html)
dashboard.write("</body></html>" + "\n")
#figures_to_html([fig, fig2])
with open('p_graph.html', 'a') as f:
f.write(fig.to_html(full_html=False, include_plotlyjs='cdn'))
f.write(fig2.to_html(full_html=False, include_plotlyjs='cdn'))
display(HTML('p_graph.html'))
#px_plot([fig,fig2], filename = 'figure_2.html')
#display(HTML('figure_2.html'))
#fig.update_layout(margin={"r":300,"t":100,"l":200,"b":0})
#fig.show("notebook")
#fig.show()
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
import json
import urllib.request
mapboxt = open(".mapbox_token").read().rstrip() #my mapbox_access_token
fig = make_subplots(
rows=1, cols=2, subplot_titles=('Map1', 'Map2'),
specs=[[{"type": "mapbox"}, {"type": "mapbox"}]]
)
swiss_url = 'https://raw.githubusercontent.com/empet/Datasets/master/swiss-cantons.geojson'
with urllib.request.urlopen(swiss_url) as url:
jdata = json.loads(url.read().decode())
data_url = "https://raw.githubusercontent.com/empet/Datasets/master/Swiss-synthetic-data.csv"
df = pd.read_csv(data_url)
fig.add_trace(go.Choroplethmapbox(geojson=jdata,
locations=df['canton-id'],
z=df['2018'],
featureidkey='properties.id',
colorscale='Viridis',
colorbar=dict(thickness=20, x=0.46),
marker=dict(opacity=0.75)), row=1, col=1)
fig.add_trace(go.Choroplethmapbox(geojson=jdata,
locations=df['canton-id'],
z=df['2019'],
featureidkey='properties.id',
colorscale='matter_r',
colorbar=dict(thickness=20, x=1.02),
marker=dict(opacity=0.75, line_width=0.5)), row=1, col=2);
fig.update_mapboxes(
bearing=0,
accesstoken=mapboxt,
center = {"lat": 46.8181877 , "lon":8.2275124 },
)
fig.update_layout(margin=dict(l=0, r=0, t=50, b=10));
#HERE YOU CAN CONTROL zoom
fig.update_layout(mapbox1=dict(zoom=5.9, style='carto-positron'),
mapbox2=dict(zoom=5.3, style='light'))
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_13724/1796321988.py in <module>
4 import json
5 import urllib.request
----> 6 mapboxt = open(".mapbox_token").read().rstrip() #my mapbox_access_token
7 fig = make_subplots(
8 rows=1, cols=2, subplot_titles=('Map1', 'Map2'),
FileNotFoundError: [Errno 2] No such file or directory: '.mapbox_token'
fig = px.choropleth_mapbox(X_grouped[att].reset_index(), geojson=gdf['geometry'], locations=gdf.index, color='PreTaxIncome_PU',
color_continuous_scale="Viridis",
range_color=(X_grouped[att].min(),X_grouped[att].max()),
mapbox_style="carto-positron",
zoom=8.5, center = {"lat": 40.730610, "lon": -73.935242},
opacity=0.5,
labels={'Median total income in borough':att}
)
px_plot(fig, filename = 'figure_2.html')
display(HTML('figure_2.html'))
X_grouped[[att,'EducAttain']]
| PreTaxIncome_PU | EducAttain | |
|---|---|---|
| BoroName | ||
| Bronx | 42553.719 | 2.0 |
| Brooklyn | 60075.840 | 2.0 |
| Manhattan | 75094.797 | 4.0 |
| Queens | 70288.734 | 2.0 |
| Staten Island | 90113.758 | 2.0 |
attributes in ML model¶
NP: Number of people in house hold
Race
Sex
Boro
Age
LANX: language other than language spoken
DIS: disability (Recode)